﻿using PanGu;
using SimHashBusiness.Interfaces;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;


namespace SimHashBusiness.Tokenisers
{
    /// <summary>
    /// Using Lucene PanGu to segment the whole sentence
    /// </summary>
    public class LuceneStringTokeniser : ITokeniser
    {
        public IEnumerable<string> Tokenise(string input)
        {
            Segment segment = new Segment();
            var words = segment.DoSegment(input);
            var results = words.Select(x => x.Word).ToArray();
            return results;
        }
    }
}
